Netflix’s Stock Price Prediction Using Time Series Analysis

Exploratory Data Analysis

Libraries and Dataset

# Import Libraries

library(knitr)
library(psych)
library(kableExtra)
library(tidyquant)
library(quantmod)
library(tseries)
library(tidyverse)
library(GGally)
library(ggplot2)
library(fpp2)
library(dplyr)
library(quantmod)
library(astsa)
library(NTS)
library(rugarch)
library(forecast)
# Import Dataset

nflx_df = getSymbols.yahoo(Symbols = 'NFLX' , env = .GlobalEnv, src = 'yahoo', from = '2016-11-15', to = '2021-11-16', auto.assign = FALSE)
head(nflx_df)
##            NFLX.Open NFLX.High NFLX.Low NFLX.Close NFLX.Volume NFLX.Adjusted
## 2016-11-15    114.55    116.41   113.09     113.59     7445100        113.59
## 2016-11-16    112.96    116.12   111.81     115.19     5933700        115.19
## 2016-11-17    115.13    116.81   113.56     115.03     6232700        115.03
## 2016-11-18    115.73    116.42   113.51     115.21     6746800        115.21
## 2016-11-21    116.20    118.72   116.19     117.96     7064500        117.96
## 2016-11-22    118.32    119.46   116.98     118.04     7007200        118.04
tail(nflx_df)
##            NFLX.Open NFLX.High NFLX.Low NFLX.Close NFLX.Volume NFLX.Adjusted
## 2021-11-08    650.29    656.00   643.79     651.45     2887500        651.45
## 2021-11-09    653.70    660.50   650.52     655.99     2415600        655.99
## 2021-11-10    653.01    660.33   642.11     646.91     2405800        646.91
## 2021-11-11    650.24    665.82   649.71     657.58     2868300        657.58
## 2021-11-12    660.01    683.34   653.82     682.61     4192700        682.61
## 2021-11-15    681.24    685.26   671.49     679.33     2872200        679.33
# Create new dataframe to preserve original dataset and assign new variable.
nflx <- nflx_df
# Check for Missing Variables
colSums(is.na(nflx))
##     NFLX.Open     NFLX.High      NFLX.Low    NFLX.Close   NFLX.Volume 
##             0             0             0             0             0 
## NFLX.Adjusted 
##             0
# Check structure to determine the shape and data types of the data set
str(nflx)
## An 'xts' object on 2016-11-15/2021-11-15 containing:
##   Data: num [1:1259, 1:6] 115 113 115 116 116 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr [1:6] "NFLX.Open" "NFLX.High" "NFLX.Low" "NFLX.Close" ...
##   Indexed by objects of class: [Date] TZ: UTC
##   xts Attributes:  
## List of 2
##  $ src    : chr "yahoo"
##  $ updated: POSIXct[1:1], format: "2021-12-05 21:53:55"

Moving forward, specified in the exploratory data analysis section of the paper, research shows that adjusted closing price is the most accurate reflection of the true value of stock. Most stock price websites that don’t disclose both closing and adjusted closing prices, the closing price shown is the adjusted price. Based on this, NFLX.Adjusted and dates will be used to predict stock prices.

# New dataframe to select only the adjusted close
adj_nflx = Ad(nflx)
head(adj_nflx)
##            NFLX.Adjusted
## 2016-11-15        113.59
## 2016-11-16        115.19
## 2016-11-17        115.03
## 2016-11-18        115.21
## 2016-11-21        117.96
## 2016-11-22        118.04
summary(adj_nflx)
##      Index            NFLX.Adjusted  
##  Min.   :2016-11-15   Min.   :113.6  
##  1st Qu.:2018-02-15   1st Qu.:262.9  
##  Median :2019-05-20   Median :348.5  
##  Mean   :2019-05-18   Mean   :351.1  
##  3rd Qu.:2020-08-17   3rd Qu.:485.1  
##  Max.   :2021-11-15   Max.   :690.3

Summary shows that within the span of five years, Netflix’s stock prices have fluctuated with a range of 576.5, with the average being around 351.1.

par(mfrow=c(2,1))
plot(nflx$NFLX.Volume, type = 'l', ylab = 'Volume', main = NULL, cex.axis = .5)
mtext(side = 3, text = 'Volume of Netflix Stock Over Time', line = 1.5, cex = 1, font = 2)

plot(adj_nflx, type = 'l', ylab = 'Close Prices', main = NULL, cex.axis = .5)
mtext(side = 3, text = 'Close Prices over Time', line = 1.5, cex = 1, font = 2)

ADF (Augmented Dickey-Fuller) Test Null hypothesis H0 : The times series is non-stationary. Alternative hypothesis HA : The times series is stationary. If p-value is less than the significant level (0.05), reject the null hypothesis and conclude that the times series is stationary. Since p-value is 0.5738, it fails to reject the null value. This will show that this times series is non-stationary.

print(adf.test(adj_nflx))
## 
##  Augmented Dickey-Fuller Test
## 
## data:  adj_nflx
## Dickey-Fuller = -2.0107, Lag order = 10, p-value = 0.5738
## alternative hypothesis: stationary

Differencing

adf_diff = diff(log(adj_nflx), lag = 1)
adf_diff = na.locf(adf_diff, na.rm = TRUE, fromLast = TRUE)
plot(adf_diff, main = NULL)
mtext(side = 3, text = 'Logged Difference', line = 1.5, cex = 1, font = 2)

ACF (Autocorrelation Function)

# ACF
acf_nflx = acf(adf_diff, main = 'ACF')

PACF (Partial Autocorrelation Function)

# PACF
pacf_nflx = pacf(adf_diff, main = 'PACF')

Adjusted Closing Price vs Difference

The adjusted closing price shows a relatively steady rise as time increased. When taken the difference, there are some spikes within the data. Because of this, we will take the logged difference to see if there is less variance.

par(mfrow = c(2,1))
plot(adj_nflx, main = 'Adjusted Closing Price', col = 'cornflowerblue')
plot(diff(adj_nflx), main = 'Difference Closing Price', col = 'cornflowerblue')

Decomposition of Additive Time Series This plot shows a steady rise for the trend. The seasonal appears to have a frequency as well.

# Decomposition of Additive Time Series

nflx_ts = ts(adj_nflx, frequency = 365, start = 2015-11-15)
nflx_de = decompose(nflx_ts)
plot(nflx_de)

Logged Difference

The graphs show that the logged difference shows a more balanced variance. Thus, we will explore log_diff for modeling.

Observation/Note: Volatility is observed even after with differencing.

# Calculate the First Degree Difference
diff = diff(adj_nflx)
log_diff = diff(log(adj_nflx))
sqrt_diff = diff(sqrt(adj_nflx))

par(mfrow = c(3,1))
plot(diff, type = 'l', xlab = 'Time', ylab = 'Difference', main = 'First Degree Difference - Raw Data')
plot(log_diff, type = 'l', xlab = 'Time', ylab = 'Logged Difference', main = 'First Degree Difference - Logged Data')
plot(sqrt_diff, type = 'l', xlab = 'Time', ylab = 'Square Root Difference', main = 'First Degree Difference - Square-Root Data')

ACF First Degree Difference

par(mfrow = c(2,1))
acf(sqrt_diff, main = 'Autocorrelation Function of First Differences', na.action = na.pass)
pacf(sqrt_diff, lag.max = 31, main = 'Partial Autocorrelation Function of First Differences', na.action = na.pass)

Second Degree Differencing

Logged difference with second degree shows a more balanced variance. Thus, we will use log_diff2 for modeling.

# Calculate the Second Degree Difference

diff2 = diff(diff)
log_diff2 = diff(log_diff)
sqrt_diff2 = diff(sqrt_diff)

par(mfrow = c(3,1))
plot(diff2, type = 'l', xlab = 'Time', ylab = '2nd Diff', main = 'Second Degree Difference - Raw Data')
plot(log_diff2, type = 'l', xlab = 'Time', ylab = '2nd Log Diff', main = 'Second Degree Difference - Logged Data')
plot(sqrt_diff2, type = 'l', xlab = 'Time', ylab = '2nd Diff - Square-Root', main = 'Second Degree Difference - Square-Root Data')

par(mfrow = c(2, 1))

plot(nflx, ylab = 'QEPS', type = 'o', col = 4, cex = .5, main = NULL)
mtext(side = 3, text = 'Stock Price Growth', line = 1.5, cex = 1, font = 2)
plot(log(nflx), ylab = 'log(QEPS)', type = 'o', col = 4, cex = .5, main = NULL)

Results

  1. Per EDA process, it’s noted that differencing with logging showed better stationarity. Thus, we will be using log_diff for first differencing and log_diff2 for second differencing for our models.

  2. Parameters for the models will be chosen by ACF and PACF plots.

  3. The models with different parameters will be tested. In addition, auto.arima wil be tested for choosing the best models.

  4. Diagnostic measures will be performed to see the residual’s correlation. Since we have noticed high volatility in the EDA process, we will move on to GARCH models to confirm the volatility of Netflix stock prices.

MODELS FOR FIRST DEGREE DIFFERENCING

ACF and PACF of diff_log

Per ACF, spikes at 1, 5, and 8. PACF tapers to zero. Thus, let’s try MA(1).

FirstDiff = diff(log(adj_nflx))
par(mfrow=c(2,1))
acf(FirstDiff, na.action = na.pass, main ='ACF Plot of First Differencing', plot = TRUE)
pacf(FirstDiff, na.action = na.pass, main = NULL)

MODELS FOR SECOND DEGREE DIFFERENCING

ACF was cut off at lag 1. PACF tailed off. Let’s try MA(1)

par(mfrow=c(2,1))
acf(diff(FirstDiff), na.action = na.pass, main ='ACF Plot of Second Differencing')
pacf(diff(FirstDiff), na.action = na.pass, main = NULL)

Now, we assigned the models’ names by different parameters.

Model1 = MA(1) for First Differencing Model2 = MA(1) for Second Differencing Model3 = auto.arima for Differencing for Logged Values

# Model1 = MA(1) for First Differencing
Model1 = sarima((log(adj_nflx)), 0,1,1)
## initial  value -3.733756 
## iter   2 value -3.736647
## iter   3 value -3.736667
## iter   3 value -3.736667
## iter   3 value -3.736667
## final  value -3.736667 
## converged
## initial  value -3.736665 
## iter   1 value -3.736665
## final  value -3.736665 
## converged

Model1
## $fit
## 
## Call:
## arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = S), 
##     xreg = constant, transform.pars = trans, fixed = fixed, optim.control = list(trace = trc, 
##         REPORT = 1, reltol = tol))
## 
## Coefficients:
##           ma1  constant
##       -0.0732    0.0014
## s.e.   0.0269    0.0006
## 
## sigma^2 estimated as 0.000568:  log likelihood = 2915.7,  aic = -5825.4
## 
## $degrees_of_freedom
## [1] 1256
## 
## $ttable
##          Estimate     SE t.value p.value
## ma1       -0.0732 0.0269 -2.7208  0.0066
## constant   0.0014 0.0006  2.2801  0.0228
## 
## $AIC
## [1] -4.630684
## 
## $AICc
## [1] -4.630676
## 
## $BIC
## [1] -4.618433
# Model2 = MA(1) for Second Differencing
Model2 = sarima((log(adj_nflx)), 0,2,1)
## initial  value -3.348754 
## iter   2 value -3.595890
## iter   3 value -3.657550
## iter   4 value -3.722692
## iter   5 value -3.725012
## iter   6 value -3.725610
## iter   7 value -3.725624
## iter   8 value -3.725685
## iter   9 value -3.725702
## iter  10 value -3.725703
## iter  10 value -3.725703
## final  value -3.725703 
## converged
## initial  value -3.727702 
## iter   2 value -3.730423
## iter   3 value -3.730512
## iter   4 value -3.730519
## iter   4 value -3.730519
## final  value -3.730519 
## converged

Model2
## $fit
## 
## Call:
## arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = S), 
##     include.mean = !no.constant, transform.pars = trans, fixed = fixed, optim.control = list(trace = trc, 
##         REPORT = 1, reltol = tol))
## 
## Coefficients:
##          ma1
##       -1.000
## s.e.   0.003
## 
## sigma^2 estimated as 0.0005718:  log likelihood = 2905.66,  aic = -5807.31
## 
## $degrees_of_freedom
## [1] 1256
## 
## $ttable
##     Estimate    SE   t.value p.value
## ma1       -1 0.003 -338.3381       0
## 
## $AIC
## [1] -4.619979
## 
## $AICc
## [1] -4.619977
## 
## $BIC
## [1] -4.611807
# Model3 = Auto.arima for logged, first differencing
Model3 <- auto.arima(log(adj_nflx))
summary(Model3)
## Series: log(adj_nflx) 
## ARIMA(0,1,2) with drift 
## 
## Coefficients:
##           ma1     ma2   drift
##       -0.0787  0.0430  0.0014
## s.e.   0.0283  0.0276  0.0006
## 
## sigma^2 estimated as 0.0005683:  log likelihood=2916.91
## AIC=-5825.82   AICc=-5825.79   BIC=-5805.27
## 
## Training set error measures:
##                        ME       RMSE        MAE           MPE      MAPE
## Training set 4.975572e-06 0.02380137 0.01665857 -6.499572e-05 0.2881782
##                   MASE       ACF1
## Training set 0.9922833 0.00128343

Diagnostic Test for Model3

# Diagnostic Checking
checkresiduals(Model3)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,2) with drift
## Q* = 21.964, df = 7, p-value = 0.002577
## 
## Model df: 3.   Total lags used: 10
preds <- forecast(Model3, h = 30) 
autoplot(preds)

# Backtest ARIMA model
results <- backtest(Model3, log(adj_nflx), orig = 80, h = 30) 
## [1] "RMSE of out-of-sample forecasts"
##  [1] 0.02450318 0.03339095 0.04090226 0.04760317 0.05399938 0.05898188
##  [7] 0.06332254 0.06752260 0.07072667 0.07382688 0.07646482 0.07891052
## [13] 0.08161427 0.08436618 0.08725607 0.08984353 0.09277125 0.09545512
## [19] 0.09855400 0.10134673 0.10382177 0.10642313 0.10855922 0.11083121
## [25] 0.11284425 0.11497889 0.11711686 0.11913397 0.12102913 0.12294952
## [1] "Mean absolute error of out-of-sample forecasts"
##  [1] 0.01726316 0.02443677 0.03013491 0.03524700 0.03985101 0.04409672
##  [7] 0.04734963 0.05052035 0.05305025 0.05589673 0.05807326 0.06013008
## [13] 0.06244413 0.06496971 0.06735709 0.06992802 0.07247516 0.07473823
## [19] 0.07695234 0.07922764 0.08113685 0.08311863 0.08479285 0.08695604
## [25] 0.08855427 0.09029957 0.09217862 0.09391302 0.09527088 0.09680530
## [1] "Bias of out-of-sample forecasts"
##  [1] 0.001277000 0.002507773 0.003792010 0.005071183 0.006359524 0.007638182
##  [7] 0.008933103 0.010259864 0.011616813 0.012962110 0.014303527 0.015650820
## [13] 0.016971505 0.018271686 0.019583302 0.020906558 0.022232131 0.023552783
## [19] 0.024839846 0.026151707 0.027458753 0.028753056 0.030059930 0.031368503
## [25] 0.032646930 0.033954124 0.035292652 0.036624957 0.037960275 0.039282467
print(paste("Average RMSE: ", round(exp(mean(results$rmse)), digits = 3)))
## [1] "Average RMSE:  1.089"
print(paste("Average MAE: ", round(exp(mean(results$mabso)), digits = 3)))
## [1] "Average MAE:  1.068"

Predicted Prices for Next 30 Days

preds_df = data.frame(preds)
predicted_prices <- exp(preds_df)
predicted_prices
##      Point.Forecast    Lo.80    Hi.80    Lo.95    Hi.95
## 1260       681.6063 661.0972 702.7515 650.4914 714.2094
## 1261       682.4630 654.6942 711.4095 640.4546 727.2267
## 1262       683.4341 649.5005 719.1405 632.2247 738.7914
## 1263       684.4065 645.3018 725.8811 625.5137 748.8443
## 1264       685.3804 641.7353 731.9938 619.7676 757.9394
## 1265       686.3557 638.6161 737.6640 614.7035 766.3598
## 1266       687.3323 635.8344 743.0012 610.1538 774.2731
## 1267       688.3103 633.3189 748.0768 606.0096 781.7882
## 1268       689.2898 631.0201 752.9401 602.1953 788.9805
## 1269       690.2706 628.9022 757.6272 598.6562 795.9050
## 1270       691.2528 626.9382 762.1651 595.3510 802.6028
## 1271       692.2364 625.1070 766.5747 592.2477 809.1061
## 1272       693.2214 623.3921 770.8727 589.3208 815.4403
## 1273       694.2078 621.7798 775.0726 586.5497 821.6260
## 1274       695.1956 620.2592 779.1855 583.9176 827.6801
## 1275       696.1848 618.8209 783.2207 581.4103 833.6168
## 1276       697.1755 617.4572 787.1860 579.0159 839.4479
## 1277       698.1675 616.1613 791.0881 576.7242 845.1837
## 1278       699.1610 614.9276 794.9326 574.5265 850.8329
## 1279       700.1558 613.7511 798.7247 572.4152 856.4032
## 1280       701.1521 612.6273 802.4687 570.3836 861.9012
## 1281       702.1498 611.5525 806.1685 568.4258 867.3328
## 1282       703.1489 610.5232 809.8274 566.5366 872.7033
## 1283       704.1494 609.5364 813.4485 564.7115 878.0172
## 1284       705.1514 608.5893 817.0346 562.9463 883.2787
## 1285       706.1548 607.6796 820.5880 561.2372 888.4917
## 1286       707.1596 606.8050 824.1111 559.5809 893.6593
## 1287       708.1659 605.9634 827.6058 557.9744 898.7848
## 1288       709.1735 605.1532 831.0740 556.4148 903.8708
## 1289       710.1826 604.3726 834.5173 554.8996 908.9201

GARCH Models - Look at Volatility of Stock Prices

GARCH Model1 with ARMA order (1,1)

nflx_garch <-  ugarchspec(variance.model = list(model="sGARCH",garchOrder=c(1,1)), mean.model = list(armaOrder=c(1,1)), distribution.model = "std")
nflx_garch1 <-ugarchfit(spec=nflx_garch, data=adj_nflx)
## Warning in arima(data, order = c(modelinc[2], 0, modelinc[3]), include.mean =
## modelinc[1], : possible convergence problem: optim gave code = 1
nflx_garch1
## 
## *---------------------------------*
## *          GARCH Model Fit        *
## *---------------------------------*
## 
## Conditional Variance Dynamics    
## -----------------------------------
## GARCH Model  : sGARCH(1,1)
## Mean Model   : ARFIMA(1,0,1)
## Distribution : std 
## 
## Optimal Parameters
## ------------------------------------
##          Estimate  Std. Error   t value Pr(>|t|)
## mu     113.690205    4.384903   25.9276 0.000000
## ar1      1.000000    0.000796 1256.1653 0.000000
## ma1     -0.059036    0.026992   -2.1871 0.028734
## omega    0.152889    0.102623    1.4898 0.136274
## alpha1   0.081915    0.012078    6.7820 0.000000
## beta1    0.917085    0.013259   69.1661 0.000000
## shape    4.211412    0.401749   10.4827 0.000000
## 
## Robust Standard Errors:
##          Estimate  Std. Error   t value Pr(>|t|)
## mu     113.690205    0.385913  294.6003 0.000000
## ar1      1.000000    0.000874 1143.9489 0.000000
## ma1     -0.059036    0.028456   -2.0747 0.038019
## omega    0.152889    0.129386    1.1816 0.237345
## alpha1   0.081915    0.016830    4.8674 0.000001
## beta1    0.917085    0.020337   45.0946 0.000000
## shape    4.211412    0.421497    9.9916 0.000000
## 
## LogLikelihood : -4224.874 
## 
## Information Criteria
## ------------------------------------
##                    
## Akaike       6.7226
## Bayes        6.7512
## Shibata      6.7225
## Hannan-Quinn 6.7333
## 
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
##                         statistic p-value
## Lag[1]                     0.6016  0.4380
## Lag[2*(p+q)+(p+q)-1][5]    3.3735  0.2619
## Lag[4*(p+q)+(p+q)-1][9]    6.9326  0.1346
## d.o.f=2
## H0 : No serial correlation
## 
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
##                         statistic p-value
## Lag[1]                      0.170  0.6801
## Lag[2*(p+q)+(p+q)-1][5]     1.073  0.8429
## Lag[4*(p+q)+(p+q)-1][9]     1.499  0.9556
## d.o.f=2
## 
## Weighted ARCH LM Tests
## ------------------------------------
##             Statistic Shape Scale P-Value
## ARCH Lag[3]    0.6494 0.500 2.000  0.4203
## ARCH Lag[5]    0.6968 1.440 1.667  0.8244
## ARCH Lag[7]    0.8238 2.315 1.543  0.9404
## 
## Nyblom stability test
## ------------------------------------
## Joint Statistic:  7.6532
## Individual Statistics:                
## mu     0.0005036
## ar1    0.7415155
## ma1    0.2570143
## omega  0.5953135
## alpha1 1.2333304
## beta1  0.8482511
## shape  1.0509154
## 
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic:          1.69 1.9 2.35
## Individual Statistic:     0.35 0.47 0.75
## 
## Sign Bias Test
## ------------------------------------
##                    t-value   prob sig
## Sign Bias           0.4964 0.6197    
## Negative Sign Bias  0.4132 0.6795    
## Positive Sign Bias  1.1354 0.2564    
## Joint Effect        1.5176 0.6782    
## 
## 
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
##   group statistic p-value(g-1)
## 1    20     26.29       0.1223
## 2    30     27.14       0.5642
## 3    40     35.07       0.6495
## 4    50     51.37       0.3812
## 
## 
## Elapsed time : 0.3054609

GARCH MODEL2 WITH ARMA (2,2)

nflx_garch2 <-  ugarchspec(variance.model = list(model="sGARCH",garchOrder=c(1,1)), mean.model = list(armaOrder=c(2,2)), distribution.model = "std")
nflx_garch2 <-ugarchfit(spec=nflx_garch2, data=adj_nflx)
nflx_garch2
## 
## *---------------------------------*
## *          GARCH Model Fit        *
## *---------------------------------*
## 
## Conditional Variance Dynamics    
## -----------------------------------
## GARCH Model  : sGARCH(1,1)
## Mean Model   : ARFIMA(2,0,2)
## Distribution : std 
## 
## Optimal Parameters
## ------------------------------------
##          Estimate  Std. Error    t value Pr(>|t|)
## mu     113.204458    0.410537   275.7470 0.000000
## ar1      0.022298    0.004577     4.8720 0.000001
## ar2      0.981119    0.004625   212.1114 0.000000
## ma1      0.915764    0.000015 60943.5104 0.000000
## ma2     -0.070207    0.000592  -118.5324 0.000000
## omega    0.155211    0.103317     1.5023 0.133023
## alpha1   0.082929    0.012137     6.8326 0.000000
## beta1    0.916071    0.013341    68.6661 0.000000
## shape    4.166105    0.391210    10.6493 0.000000
## 
## Robust Standard Errors:
##          Estimate  Std. Error    t value Pr(>|t|)
## mu     113.204458    0.294881   383.8983 0.000000
## ar1      0.022298    0.005400     4.1294 0.000036
## ar2      0.981119    0.005623   174.4833 0.000000
## ma1      0.915764    0.000013 68575.9719 0.000000
## ma2     -0.070207    0.000415  -169.3417 0.000000
## omega    0.155211    0.129171     1.2016 0.229520
## alpha1   0.082929    0.016732     4.9561 0.000001
## beta1    0.916071    0.020324    45.0734 0.000000
## shape    4.166105    0.413512    10.0749 0.000000
## 
## LogLikelihood : -4221.8 
## 
## Information Criteria
## ------------------------------------
##                    
## Akaike       6.7209
## Bayes        6.7576
## Shibata      6.7208
## Hannan-Quinn 6.7347
## 
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
##                          statistic   p-value
## Lag[1]                      0.9913 3.194e-01
## Lag[2*(p+q)+(p+q)-1][11]    8.9600 8.242e-06
## Lag[4*(p+q)+(p+q)-1][19]   13.9519 5.984e-02
## d.o.f=4
## H0 : No serial correlation
## 
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
##                         statistic p-value
## Lag[1]                     0.1380  0.7103
## Lag[2*(p+q)+(p+q)-1][5]    0.9734  0.8657
## Lag[4*(p+q)+(p+q)-1][9]    1.3741  0.9652
## d.o.f=2
## 
## Weighted ARCH LM Tests
## ------------------------------------
##             Statistic Shape Scale P-Value
## ARCH Lag[3]    0.5714 0.500 2.000  0.4497
## ARCH Lag[5]    0.6049 1.440 1.667  0.8523
## ARCH Lag[7]    0.7411 2.315 1.543  0.9517
## 
## Nyblom stability test
## ------------------------------------
## Joint Statistic:  7.3642
## Individual Statistics:              
## mu     0.01413
## ar1    0.05526
## ar2    0.03545
## ma1    0.12500
## ma2    0.08744
## omega  0.60695
## alpha1 1.29812
## beta1  0.87017
## shape  1.07678
## 
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic:          2.1 2.32 2.82
## Individual Statistic:     0.35 0.47 0.75
## 
## Sign Bias Test
## ------------------------------------
##                    t-value   prob sig
## Sign Bias           0.7645 0.4447    
## Negative Sign Bias  0.2577 0.7967    
## Positive Sign Bias  1.2745 0.2027    
## Joint Effect        1.8533 0.6034    
## 
## 
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
##   group statistic p-value(g-1)
## 1    20     18.16       0.5120
## 2    30     22.61       0.7940
## 3    40     31.33       0.8042
## 4    50     37.47       0.8855
## 
## 
## Elapsed time : 0.6291411

GARCH MODEL3 WITH ARMA ORDER (3,3)

nflx_garch3 <-  ugarchspec(variance.model = list(model="sGARCH",garchOrder=c(1,1)), mean.model = list(armaOrder=c(3,3)), distribution.model = "std")
nflx_garch3 <-ugarchfit(spec=nflx_garch3, data=adj_nflx)
## Warning in arima(data, order = c(modelinc[2], 0, modelinc[3]), include.mean =
## modelinc[1], : possible convergence problem: optim gave code = 1
nflx_garch3
## 
## *---------------------------------*
## *          GARCH Model Fit        *
## *---------------------------------*
## 
## Conditional Variance Dynamics    
## -----------------------------------
## GARCH Model  : sGARCH(1,1)
## Mean Model   : ARFIMA(3,0,3)
## Distribution : std 
## 
## Optimal Parameters
## ------------------------------------
##          Estimate  Std. Error   t value Pr(>|t|)
## mu     112.980156    1.406618   80.3205  0.00000
## ar1      1.506436    0.001629  924.9161  0.00000
## ar2     -1.357242    0.001712 -792.6000  0.00000
## ar3      0.853159    0.001952  436.9622  0.00000
## ma1     -0.572363    0.027208  -21.0362  0.00000
## ma2      0.897680    0.019185   46.7910  0.00000
## ma3     -0.037132    0.027646   -1.3431  0.17923
## omega    0.155388    0.103996    1.4942  0.13513
## alpha1   0.082432    0.012089    6.8186  0.00000
## beta1    0.916568    0.013289   68.9721  0.00000
## shape    4.197099    0.399745   10.4994  0.00000
## 
## Robust Standard Errors:
##          Estimate  Std. Error    t value Pr(>|t|)
## mu     112.980156    0.531122   212.7198 0.000000
## ar1      1.506436    0.000627  2401.4776 0.000000
## ar2     -1.357242    0.000584 -2325.6007 0.000000
## ar3      0.853159    0.000228  3746.9702 0.000000
## ma1     -0.572363    0.028687   -19.9518 0.000000
## ma2      0.897680    0.019199    46.7564 0.000000
## ma3     -0.037132    0.030335    -1.2241 0.220926
## omega    0.155388    0.131475     1.1819 0.237252
## alpha1   0.082432    0.016700     4.9361 0.000001
## beta1    0.916568    0.020194    45.3886 0.000000
## shape    4.197099    0.425247     9.8698 0.000000
## 
## LogLikelihood : -4221.973 
## 
## Information Criteria
## ------------------------------------
##                    
## Akaike       6.7243
## Bayes        6.7692
## Shibata      6.7242
## Hannan-Quinn 6.7412
## 
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
##                          statistic p-value
## Lag[1]                       1.031  0.3099
## Lag[2*(p+q)+(p+q)-1][17]     8.438  0.8273
## Lag[4*(p+q)+(p+q)-1][29]    16.372  0.2998
## d.o.f=6
## H0 : No serial correlation
## 
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
##                         statistic p-value
## Lag[1]                     0.1312  0.7172
## Lag[2*(p+q)+(p+q)-1][5]    0.9880  0.8624
## Lag[4*(p+q)+(p+q)-1][9]    1.3847  0.9645
## d.o.f=2
## 
## Weighted ARCH LM Tests
## ------------------------------------
##             Statistic Shape Scale P-Value
## ARCH Lag[3]    0.5769 0.500 2.000  0.4475
## ARCH Lag[5]    0.6430 1.440 1.667  0.8408
## ARCH Lag[7]    0.7626 2.315 1.543  0.9488
## 
## Nyblom stability test
## ------------------------------------
## Joint Statistic:  8.041
## Individual Statistics:              
## mu     0.01276
## ar1    0.04650
## ar2    0.03577
## ar3    0.03583
## ma1    0.52343
## ma2    0.53937
## ma3    0.12094
## omega  0.61567
## alpha1 1.23718
## beta1  0.85103
## shape  1.13167
## 
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic:          2.49 2.75 3.27
## Individual Statistic:     0.35 0.47 0.75
## 
## Sign Bias Test
## ------------------------------------
##                    t-value   prob sig
## Sign Bias           0.8284 0.4076    
## Negative Sign Bias  0.3194 0.7495    
## Positive Sign Bias  1.3735 0.1699    
## Joint Effect        2.2187 0.5283    
## 
## 
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
##   group statistic p-value(g-1)
## 1    20     15.58       0.6849
## 2    30     18.13       0.9415
## 3    40     29.61       0.8612
## 4    50     43.98       0.6764
## 
## 
## Elapsed time : 0.63712

Table to Show Akaike Values for the GARCH Models

GARCH(2,2) and GARCH(3,3) had lower p values for weighted Ljung-Box Test on standardized residuals.

GARCH(1,1) had the best metrics and outperformed the other two GARCH models. This is the model that will be used.

GARCH_Models = c('GARCH(1,1)', 'GARCH(2,2)','GARCH(3,3)')
Akaike = c('6.722', '6.721', '6.724')

gtable = data.frame(GARCH_Models, Akaike)
gtable
##   GARCH_Models Akaike
## 1   GARCH(1,1)  6.722
## 2   GARCH(2,2)  6.721
## 3   GARCH(3,3)  6.724
kable(gtable, format = 'html', caption = '<b>Garch Model - Akaike Values<b>',position = 'center', align = 'cc') %>%
  kable_styling(full_width = TRUE, position = 'center')%>%
  kable_classic(html_font = 'Times New Roman')
Garch Model - Akaike Values
GARCH_Models Akaike
GARCH(1,1) 6.722
GARCH(2,2) 6.721
GARCH(3,3) 6.724

FORECASTING

Forecasting with bootstrap forecast to forecast both series and conditional variances.

nflx_garch1 with arma order (1,1)

nflx_predict <- ugarchboot(nflx_garch1, n.ahead=30,method=c("Partial","Full")[1])
nflx_predict
## 
## *-----------------------------------*
## *     GARCH Bootstrap Forecast      *
## *-----------------------------------*
## Model : sGARCH
## n.ahead : 30
## Bootstrap method:  partial
## Date (T[0]): 2021-11-15
## 
## Series (summary):
##         min   q.25   mean   q.75    max forecast[analytic]
## t+1  586.87 673.93 681.20 689.12 757.12             679.43
## t+2  560.49 672.15 681.20 690.66 804.67             679.43
## t+3  580.38 670.32 681.49 692.11 831.02             679.43
## t+4  570.78 668.73 682.70 696.55 825.64             679.43
## t+5  550.79 666.31 683.22 699.21 840.86             679.43
## t+6  568.16 665.27 685.05 701.84 855.92             679.43
## t+7  560.54 664.21 685.89 704.98 895.48             679.43
## t+8  570.64 663.98 688.38 709.04 901.18             679.43
## t+9  567.82 663.15 689.66 713.30 885.20             679.43
## t+10 573.34 664.71 692.29 715.24 900.84             679.43
## .....................
## 
## Sigma (summary):
##          min  q0.25   mean  q0.75    max forecast[analytic]
## t+1  12.9669 12.967 12.967 12.967 12.967             12.967
## t+2  12.4239 12.472 13.003 12.974 29.260             12.966
## t+3  11.9041 12.081 13.015 13.099 34.827             12.966
## t+4  11.4087 11.796 12.973 13.186 34.671             12.965
## t+5  10.9379 11.550 13.004 13.436 33.220             12.965
## t+6  10.5004 11.283 13.000 13.644 32.104             12.964
## t+7  10.1710 11.127 13.068 13.710 43.231             12.963
## t+8   9.7792 11.005 13.135 13.988 43.014             12.963
## t+9   9.4474 11.011 13.279 14.015 42.372             12.962
## t+10  9.1343 10.829 13.195 14.041 41.803             12.962
## .....................
plot(nflx_predict,which=2)

Predicted Prices by GARCH Models

nflx_predict_df <- as.data.frame(nflx_predict, which="series", type="summary")
nflx_predict_df
##           t+1      t+2      t+3      t+4      t+5      t+6      t+7      t+8
## min  586.8727 560.4925 580.3832 570.7837 550.7910 568.1595 560.5356 570.6396
## q.25 673.9271 672.1545 670.3239 668.7288 666.3128 665.2665 664.2135 663.9809
## mean 681.2002 681.2022 681.4897 682.6959 683.2213 685.0502 685.8937 688.3813
## q.75 689.1195 690.6584 692.1118 696.5533 699.2119 701.8359 704.9803 709.0429
## max  757.1204 804.6695 831.0242 825.6447 840.8649 855.9191 895.4823 901.1796
##           t+9     t+10     t+11     t+12      t+13      t+14     t+15      t+16
## min  567.8193 573.3405 581.1976 579.4033  554.8548  531.9298 524.0622  515.8633
## q.25 663.1464 664.7124 666.8361 665.6952  666.6221  664.9383 665.3408  666.4584
## mean 689.6614 692.2884 693.5479 694.1471  694.9478  696.0688 697.4145  698.7719
## q.75 713.2974 715.2414 717.7133 717.8888  719.1906  720.7093 723.6458  728.1678
## max  885.1973 900.8435 905.2487 937.0410 1012.6243 1050.7914 990.1137 1002.6399
##           t+17      t+18      t+19      t+20      t+21      t+22      t+23
## min   504.2789  502.8419  489.0520  506.7233  512.1404  503.1369  464.9144
## q.25  666.8987  664.6277  666.8792  666.1957  665.1895  664.2369  664.9360
## mean  699.3504  700.1285  701.2160  703.2033  704.6703  705.9801  707.8804
## q.75  727.6503  729.8916  731.0161  733.9533  736.7755  741.7456  740.8535
## max  1000.0113 1031.0192 1012.8966 1029.0389 1043.9284 1042.4031 1029.1598
##           t+24      t+25      t+26      t+27      t+28      t+29      t+30
## min   465.0879  488.6456  512.2124  525.3471  485.6206  480.6989  475.4839
## q.25  666.1787  666.6367  664.9942  669.8717  668.7352  665.9173  667.8820
## mean  708.6575  710.0897  711.8356  713.1691  713.9477  715.1216  716.8875
## q.75  741.7617  744.4483  750.6790  748.7896  750.7050  754.1577  755.7193
## max  1049.7215 1087.4207 1127.5932 1093.9125 1217.3089 1301.9505 1335.2386
pred_table <- as.data.frame(t(nflx_predict_df))
pred_table
##           min     q.25     mean     q.75       max
## t+1  586.8727 673.9271 681.2002 689.1195  757.1204
## t+2  560.4925 672.1545 681.2022 690.6584  804.6695
## t+3  580.3832 670.3239 681.4897 692.1118  831.0242
## t+4  570.7837 668.7288 682.6959 696.5533  825.6447
## t+5  550.7910 666.3128 683.2213 699.2119  840.8649
## t+6  568.1595 665.2665 685.0502 701.8359  855.9191
## t+7  560.5356 664.2135 685.8937 704.9803  895.4823
## t+8  570.6396 663.9809 688.3813 709.0429  901.1796
## t+9  567.8193 663.1464 689.6614 713.2974  885.1973
## t+10 573.3405 664.7124 692.2884 715.2414  900.8435
## t+11 581.1976 666.8361 693.5479 717.7133  905.2487
## t+12 579.4033 665.6952 694.1471 717.8888  937.0410
## t+13 554.8548 666.6221 694.9478 719.1906 1012.6243
## t+14 531.9298 664.9383 696.0688 720.7093 1050.7914
## t+15 524.0622 665.3408 697.4145 723.6458  990.1137
## t+16 515.8633 666.4584 698.7719 728.1678 1002.6399
## t+17 504.2789 666.8987 699.3504 727.6503 1000.0113
## t+18 502.8419 664.6277 700.1285 729.8916 1031.0192
## t+19 489.0520 666.8792 701.2160 731.0161 1012.8966
## t+20 506.7233 666.1957 703.2033 733.9533 1029.0389
## t+21 512.1404 665.1895 704.6703 736.7755 1043.9284
## t+22 503.1369 664.2369 705.9801 741.7456 1042.4031
## t+23 464.9144 664.9360 707.8804 740.8535 1029.1598
## t+24 465.0879 666.1787 708.6575 741.7617 1049.7215
## t+25 488.6456 666.6367 710.0897 744.4483 1087.4207
## t+26 512.2124 664.9942 711.8356 750.6790 1127.5932
## t+27 525.3471 669.8717 713.1691 748.7896 1093.9125
## t+28 485.6206 668.7352 713.9477 750.7050 1217.3089
## t+29 480.6989 665.9173 715.1216 754.1577 1301.9505
## t+30 475.4839 667.8820 716.8875 755.7193 1335.2386